COVID-19

Data Source: JHU CSSE JHU CSSE Github

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
%matplotlib inline
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
sns.set(context="notebook", style="darkgrid", palette="deep", font="sans-serif", font_scale=1, color_codes=True)
In [33]:
%%html
<style>
div.input {
    display:none;
}
</style>
In [2]:
from sklearn.linear_model import LinearRegression
from datetime import timedelta
import plotly.graph_objects as go
import scipy.optimize as opt
from plotly.subplots import make_subplots

from statsmodels.api import OLS
In [3]:
data_src="https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/"
cfm=pd.read_csv(data_src+"time_series_covid19_confirmed_global.csv")
dt=pd.read_csv(data_src+"time_series_covid19_deaths_global.csv")
##rc=pd.read_csv(data_src+"time_series_19-covid-Recovered.csv")
In [4]:
def trans_df(df, valname):
    out=pd.DataFrame(columns=["Country/Region","Province/State","Lat","Long",
                             "LastUpdated",valname])
    for j in df.columns[4:]:
        nval=pd.notnull(df[j])
        blk=df[nval][["Country/Region","Province/State","Lat","Long"]]
        blk['LastUpdated']=pd.to_datetime(j)
        blk[valname]=df[nval][j]
        out=out.append(blk,ignore_index=True,sort=False)
        
    out[valname]=out[valname].astype(int)    
    return(out)
In [5]:
df_cfm=trans_df(cfm,"Confirmed")
df_dt=trans_df(dt,"Death")
##df_rc=trans_df(rc,"Recovered")

df=pd.merge(df_cfm,df_dt,on=['Country/Region','Province/State','Lat','Long','LastUpdated'],how='left')
##df=pd.merge(df,df_rc,on=['Country/Region','Province/State','Lat','Long','LastUpdated'],how='left')

df.rename(columns={'Country/Region':'Country', 'Province/State':'Region','LastUpdated':'Date_updated'},inplace=True)
df.Region.fillna('',inplace=True)

df['Date_updated']=pd.to_datetime(df['Date_updated'])

#df=df[df.Date_updated <= '2020-03-28']
In [6]:
#fix BC data issue
df.loc[(df.Region=="British Columbia") & (df.Date_updated=='2020-03-17'), ['Confirmed','Death']]=[186,7]
df.loc[(df.Region=="British Columbia") & (df.Date_updated=='2020-03-18'), ['Confirmed','Death']]=[231,7]
df.loc[(df.Region=="British Columbia") & (df.Date_updated=='2020-03-19'), ['Confirmed','Death']]=[271,8]
df.loc[(df.Region=="British Columbia") & (df.Date_updated=='2020-03-20'), ['Confirmed','Death']]=[348,9]
df.loc[(df.Region=="British Columbia") & (df.Date_updated=='2020-03-22'), ['Confirmed','Death']]=[472,12]
df.loc[(df.Region=="British Columbia") & (df.Date_updated=='2020-03-23'), ['Confirmed','Death']]=[539,12]
df.loc[(df.Region=="British Columbia") & (df.Date_updated=='2020-03-25'), ['Confirmed','Death']]=[659,14]
df.loc[(df.Region=="British Columbia") & (df.Date_updated=='2020-03-27'), ['Confirmed','Death']]=[792,16]
df.loc[(df.Region=="British Columbia") & (df.Date_updated=='2020-03-29'), ['Confirmed','Death']]=[900,17]



df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-10'), ['Confirmed','Death']]=[16,0]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-11'), ['Confirmed','Death']]=[24,0]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-12'), ['Confirmed','Death']]=[26,0]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-13'), ['Confirmed','Death']]=[34,0]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-14'), ['Confirmed','Death']]=[53,0]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-15'), ['Confirmed','Death']]=[63,0]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-16'), ['Confirmed','Death']]=[86,0]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-17'), ['Confirmed','Death']]=[100,0]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-18'), ['Confirmed','Death']]=[128,0]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-19'), ['Confirmed','Death']]=[155,0]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-20'), ['Confirmed','Death']]=[199,1]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-21'), ['Confirmed','Death']]=[237,1]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-22'), ['Confirmed','Death']]=[280,1]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-23'), ['Confirmed','Death']]=[331,1]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-24'), ['Confirmed','Death']]=[395,1]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-25'), ['Confirmed','Death']]=[472,2]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-26'), ['Confirmed','Death']]=[525,2]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-27'), ['Confirmed','Death']]=[608,2]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-28'), ['Confirmed','Death']]=[661,2]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-29'), ['Confirmed','Death']]=[682,2]
df.loc[(df.Region=="Alberta") & (df.Date_updated=='2020-03-30'), ['Confirmed','Death']]=[690,8]
In [7]:
##df[(df.Region=="British Columbia")].tail(15)
df[(df.Region=="Alberta")].tail(25)
#df[(df.Region=="Alberta")].tail(15)
##df[(df.Country=="Canada") & (df.Date_updated == '2020-03-29')].Confirmed.sum()
##df[(df.Country=="Canada") & (df.Date_updated == '2020-03-28')].sort_values('Confirmed', ascending=False)
Out[7]:
Country Region Lat Long Date_updated Confirmed Death
11211 Canada Alberta 53.9333 -116.5765 2020-03-06 1 0
11465 Canada Alberta 53.9333 -116.5765 2020-03-07 2 0
11719 Canada Alberta 53.9333 -116.5765 2020-03-08 4 0
11973 Canada Alberta 53.9333 -116.5765 2020-03-09 7 0
12227 Canada Alberta 53.9333 -116.5765 2020-03-10 16 0
12481 Canada Alberta 53.9333 -116.5765 2020-03-11 24 0
12735 Canada Alberta 53.9333 -116.5765 2020-03-12 26 0
12989 Canada Alberta 53.9333 -116.5765 2020-03-13 34 0
13243 Canada Alberta 53.9333 -116.5765 2020-03-14 53 0
13497 Canada Alberta 53.9333 -116.5765 2020-03-15 63 0
13751 Canada Alberta 53.9333 -116.5765 2020-03-16 86 0
14005 Canada Alberta 53.9333 -116.5765 2020-03-17 100 0
14259 Canada Alberta 53.9333 -116.5765 2020-03-18 128 0
14513 Canada Alberta 53.9333 -116.5765 2020-03-19 155 0
14767 Canada Alberta 53.9333 -116.5765 2020-03-20 199 1
15021 Canada Alberta 53.9333 -116.5765 2020-03-21 237 1
15275 Canada Alberta 53.9333 -116.5765 2020-03-22 280 1
15529 Canada Alberta 53.9333 -116.5765 2020-03-23 331 1
15783 Canada Alberta 53.9333 -116.5765 2020-03-24 395 1
16037 Canada Alberta 53.9333 -116.5765 2020-03-25 472 2
16291 Canada Alberta 53.9333 -116.5765 2020-03-26 525 2
16545 Canada Alberta 53.9333 -116.5765 2020-03-27 608 2
16799 Canada Alberta 53.9333 -116.5765 2020-03-28 661 2
17053 Canada Alberta 53.9333 -116.5765 2020-03-29 682 2
17307 Canada Alberta 53.9333 -116.5765 2020-03-30 690 8
In [8]:
tmp=df[['Date_updated','Country','Region','Confirmed','Death']].copy()
tmp['Date_updated_1']= tmp['Date_updated']+timedelta(days=1)
new=pd.merge(tmp,tmp,how='left',
         left_on=['Country','Region','Date_updated'],
             right_on=['Country','Region','Date_updated_1'],
             suffixes=['_current','_before'])

new['Confirmed_added']=new['Confirmed_current']-new['Confirmed_before']
new['Death_added']=new['Death_current']-new['Death_before']
new.rename(columns={'Date_updated_current':'Date_updated'},inplace=True)

df=pd.merge(df,new[['Date_updated','Country','Region','Confirmed_added','Death_added']],
         how='left',on=['Date_updated','Country','Region'])
In [9]:
df[(df.Country=='Canada') & (df.Date_updated=='2020-03-30')]
Out[9]:
Country Region Lat Long Date_updated Confirmed Death Confirmed_added Death_added
17307 Canada Alberta 53.9333 -116.5765 2020-03-30 690 8 8.0 6.0
17308 Canada British Columbia 49.2827 -123.1207 2020-03-30 970 19 70.0 2.0
17309 Canada Grand Princess 37.6489 -122.6655 2020-03-30 13 0 0.0 0.0
17310 Canada Manitoba 53.7609 -98.8139 2020-03-30 96 1 24.0 0.0
17311 Canada New Brunswick 46.5653 -66.4619 2020-03-30 68 0 2.0 0.0
17312 Canada Newfoundland and Labrador 53.1355 -57.6604 2020-03-30 148 1 13.0 1.0
17313 Canada Nova Scotia 44.6820 -63.7443 2020-03-30 127 0 5.0 0.0
17314 Canada Ontario 51.2538 -85.3232 2020-03-30 1706 31 351.0 10.0
17315 Canada Prince Edward Island 46.5107 -63.4168 2020-03-30 18 0 7.0 0.0
17316 Canada Quebec 52.9399 -73.5491 2020-03-30 3430 22 590.0 0.0
17317 Canada Saskatchewan 52.9399 -106.4509 2020-03-30 156 2 0.0 2.0
17503 Canada Diamond Princess 0.0000 0.0000 2020-03-30 0 1 0.0 0.0
17510 Canada Recovered 0.0000 0.0000 2020-03-30 0 0 0.0 0.0
17517 Canada Northwest Territories 64.8255 -124.8457 2020-03-30 1 0 0.0 0.0
17518 Canada Yukon 64.2823 -135.0000 2020-03-30 4 0 0.0 0.0
In [10]:
df_country=df.groupby(['Country','Date_updated'])[['Confirmed','Death','Confirmed_added','Death_added']].sum().reset_index()
df_now=df_country.groupby('Country')[['Confirmed','Death']].max().reset_index()
df_now[df_now.Country=='Canada']
Out[10]:
Country Confirmed Death
31 Canada 7427 85
In [11]:
dfa=df_country[df_country.Country.isin(df_now[df_now.Confirmed > 300]['Country'])].copy()
dfa.loc[:,'StartDate']=-999
for j in dfa.Country.unique():
    sdate=dfa.loc[(dfa.Country==j) & (dfa.Confirmed > 100),'Date_updated'].min()
    ##print(f"Country: {j} case number: {dfa[(dfa.Country==j) & (dfa.Confirmed > 100)]['Confirmed'].min()} start date {sdate}")
    tmp=dfa.loc[dfa.Country==j,'Date_updated']-sdate
    dfa.loc[dfa.Country==j,'StartDate']=tmp.dt.days + 1
    if j == 'China':
          dfa.loc[dfa.Country==j,'StartDate']+=5
In [12]:
#countrylist=allcountrylist[:10]
#countrylist.append('Japan')
countrylist=[
'Spain',
 'Germany',
 'Iran',
 'France',
 'Switzerland',
 'United Kingdom',
 'Japan',
 'Singapore']

#countrylist.remove('China').remove('Korea, South').remove('Italy')
fig=px.line(log_y=True,range_x=[0,40],range_y=[100,400000],width=1000, height=500)
#fig=px.line(dfa[dfa.Country.isin(countrylist)],x='StartDate',y='Confirmed',color='Country',log_y=True
#            ,range_x=[0,40],range_y=[100,100000])

cty='China'
clr='blue'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2, color=clr))

cty='Italy'
clr='orange'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2, color=clr))


cty='US'
clr='green'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2,color=clr))

cty='Spain'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))

cty='Germany'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))


cty='Iran'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))

cty='France'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))


cty='Korea, South'
clr='yellow'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2,color=clr))



cty='Switzerland'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))


cty='United Kingdom'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))


cty='Canada'
clr='red'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines+markers",name=cty,marker=dict(size=8,color=clr),line=dict(width=4,color=clr))

cty='Japan'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))

cty='Singapore'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))

cty='Czechia'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))

cty='Denmark'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))

cty='Sweden'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'StartDate'],y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty,line=dict(width=2))

fig.update_layout(
    title="COVID-19 Cases by Country",
    xaxis_title="Number of Days since 100th Case",
    yaxis_title="Confirmed Case",
    font=dict(
        family="Courier New, monospace",
        size=13,
        color="#7f7f7f"
    )
)

fig.show()

fig.write_image("trend_by_country"+ dfa.Date_updated.max().strftime('_%m_%d_%Y')+".jpeg")
In [13]:
s1_cnd=dfa.loc[(dfa.Country=='Canada') & (dfa.Confirmed > 1000),'Date_updated'].min()


fig=px.line(log_y=True,range_x=['2020-03-08','2020-06-01'],range_y=[100,100000],width=1000, height=500)




cty='China'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'Date_updated']-dfa.loc[(dfa.Country==cty) & 
                (dfa.Confirmed > 1000),'Date_updated'].min()+s1_cnd,y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty + "(Date Adjusted)" ,line=dict(width=4))


cty='Korea, South'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'Date_updated']-dfa.loc[(dfa.Country==cty) & 
                (dfa.Confirmed > 1000),'Date_updated'].min()+s1_cnd,y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty + "(Date Adjusted)",line=dict(width=4))

cty='US'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'Date_updated']-dfa.loc[(dfa.Country==cty) & 
                (dfa.Confirmed > 1000),'Date_updated'].min()+s1_cnd,y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty+ "(Date Adjusted)",line=dict(width=4))

cty='Italy'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'Date_updated']-dfa.loc[(dfa.Country==cty) & 
                (dfa.Confirmed > 1000),'Date_updated'].min()+s1_cnd,y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines",name=cty+ "(Date Adjusted)",line=dict(width=4))

cty='Canada'
fig.add_scatter(x=dfa.loc[dfa.Country==cty,'Date_updated']-dfa.loc[(dfa.Country==cty) & 
                (dfa.Confirmed > 1000),'Date_updated'].min()+s1_cnd,y=dfa.loc[dfa.Country==cty,'Confirmed']
                ,mode="lines+markers",name=cty,marker=dict(size=10))






fig.update_layout(
    title="Canada COVID-19 Case",
    xaxis_title="Date",
    yaxis_title="Confirmed Case",
    font=dict(
        family="Courier New, monospace",
        size=13,
        color="#7f7f7f"
    )
)

fig.show()
fig.write_image("canada_trend"+ dfa.Date_updated.max().strftime('_%m_%d_%Y')+".jpeg")
In [14]:
pred=pd.DataFrame({'Date_updated':[ s1_cnd  + timedelta(days=x-10) for x in range(100)]})

pred=pred.merge(dfa.loc[dfa.Country=='Canada',['Date_updated','Confirmed']], how='left',on='Date_updated')


dcan=dfa.loc[dfa.Country=='Canada',['Date_updated','Confirmed']].tail(5)
x=np.arange(10)
xx=np.array([np.repeat(1,10),x]).transpose()
mod=OLS(np.log(dcan['Confirmed']), xx[:5]).fit()
pdx=mod.get_prediction(xx)
predmean=np.exp(pdx.predicted_mean)
conf=np.exp(pdx.conf_int(alpha=0.05))

out=pd.DataFrame({'Date_updated': dcan['Date_updated'].append(dcan['Date_updated']+timedelta(days=5)), 
                  'predict':predmean, 
                  'cf_low':conf[:,0],'cf_high':conf[:,1]})

pred=pred.merge(out,how='left',on='Date_updated')


cty='US'
tmp=dfa.loc[dfa.Country==cty,['Date_updated','Confirmed']]
tmp['Date_updted_'+cty]=tmp['Date_updated']
tmp['Date_updated']=tmp['Date_updated'] - tmp.loc[tmp.Confirmed > 1000, 'Date_updated'].min() + s1_cnd
tmp.rename(columns={"Confirmed": cty + " - Date Adjusted"}, inplace=True)
pred=pred.merge(tmp,how='left', on='Date_updated')

cty='Italy'
tmp=dfa.loc[dfa.Country==cty,['Date_updated','Confirmed']]
tmp['Date_updted_'+cty]=tmp['Date_updated']
tmp['Date_updated']=tmp['Date_updated'] - tmp.loc[tmp.Confirmed > 1000, 'Date_updated'].min() + s1_cnd
tmp.rename(columns={"Confirmed": cty + " - Date Adjusted"}, inplace=True)

pred=pred.merge(tmp,how='left', on='Date_updated')

cty='Korea, South'
tmp=dfa.loc[dfa.Country==cty,['Date_updated','Confirmed']]
tmp['Date_updted_'+cty]=tmp['Date_updated']
tmp['Date_updated']=tmp['Date_updated'] - tmp.loc[tmp.Confirmed > 1000, 'Date_updated'].min() + s1_cnd
tmp.rename(columns={"Confirmed": cty + " - Date Adjusted"}, inplace=True)

pred=pred.merge(tmp,how='left', on='Date_updated')

cty='China'
tmp=dfa.loc[dfa.Country==cty,['Date_updated','Confirmed']]
tmp['Date_updted_'+cty]=tmp['Date_updated']
tmp['Date_updated']=tmp['Date_updated'] - tmp.loc[tmp.Confirmed > 1000, 'Date_updated'].min() + s1_cnd
tmp.rename(columns={"Confirmed": cty + " - Date Adjusted"}, inplace=True)

pred=pred.merge(tmp,how='left', on='Date_updated')



pred.to_csv('CanadaGrowth'+ dfa.Date_updated.max().strftime('_%m_%d_%Y')+'.csv')
                   
    
    
In [15]:
df_china=dfa[(dfa.Country=='China') & (dfa.Confirmed > 0) ]
df_italy=dfa[(dfa.Country=='Italy') & (dfa.Confirmed > 0)]
df_korea=dfa[(dfa.Country=='Korea, South') & (dfa.Confirmed > 0)]
df_us=dfa[(dfa.Country=='US') & (dfa.Confirmed > 0)]
df_canada=dfa[(dfa.Country=='Canada') & (dfa.Confirmed > 0)]

    
In [32]:
fig = go.Figure()
fig.add_trace(go.Bar(x=df_china.Date_updated,
                y=df_china.Confirmed_added,
                name='China',
                marker_color='rgb(55, 83, 109)'
                ))

fig.add_trace(go.Bar(x=[pd.to_datetime('2020-01-23')], y=[30000], name='Lockdown: Jan 23', marker_color='rgb(256,0,0)'))
fig.add_trace(go.Bar(x=[pd.to_datetime('2020-03-25')], y=[30000], name='Lockdown lift: Mar 25', marker_color='rgb(256,0,0)'))



fig.add_annotation(
            x=pd.to_datetime('2020-01-24'),
            y=14000,
            ax=100,
            ay=0,
            text="Lockdown: Jan 23")

fig.add_annotation(
            x=pd.to_datetime('2020-02-15'),
            y=5000,
            ax=70,
            ay=0,
            text="Peak: Feb 14")

fig.add_annotation(
            x=pd.to_datetime('2020-03-04'),
            y=12000,
            ax=0,
            ay=0,
            text="From lockdown to peak: 21 days",
            font=dict(
                color="black",
                size=20
            ))

fig.add_annotation(
            x=pd.to_datetime('2020-03-25'),
            y=14000,
            ax=-100,
            ay=0,
            text="Lockdown lifted: Mar 25")

fig.update_layout(
       autosize=False,
    width=800,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    
    showlegend=False,
    title='China Daily Cases',
    xaxis_tickfont_size=14,
    xaxis=dict(
    range=pd.to_datetime(['2020-01-15','2020-04-05'])),
    yaxis=dict(
        title='Daily Cases',
        titlefont_size=16,
        tickfont_size=14,
        range=[0,16000]
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1, # gap between bars of the same location coordinate.
    annotations=[
        dict(
            
            xref="x",
            yref="y",
            
            showarrow=True,
            arrowhead=1,
            arrowwidth=2
           
        
        )
    ]
    
)
fig.show()
fig.write_image("china"+ dfa.Date_updated.max().strftime('_%m_%d_%Y')+".jpeg")
In [19]:
fig = go.Figure()
fig.add_trace(go.Bar(x=df_korea.Date_updated,
                y=df_korea.Confirmed_added,
                name='Korea',
                marker_color='rgb(55, 83, 109)'
                ))

fig.add_trace(go.Bar(x=[pd.to_datetime('2020-02-25')]
                     , y=[20000], name='Lockdown: Feb 25', marker_color='rgb(256,0,0)'))


fig.add_annotation(
            x=pd.to_datetime('2020-02-26'),
            y=900,
            ax=100,
            ay=0,
            text="Partial Lockdown: Feb 25")

fig.add_annotation(
            x=pd.to_datetime('2020-03-04'),
            y=700,
            ax=70,
            ay=0,
            text="Peak: Mar 3")

fig.add_annotation(
            x=pd.to_datetime('2020-03-15'),
            y=800,
            ax=0,
            ay=0,
            text="From lockdown to peak: 7 days",
            font=dict(
                color="black",
                size=20
            ))

fig.update_layout(
    autosize=False,
    width=800,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    
    
    showlegend=False,
    title='Korea Daily Cases',
    xaxis_tickfont_size=14,
     xaxis=dict(
    range=pd.to_datetime(['2020-02-16','2020-04-05'])),
    yaxis=dict(
        title='Daily Cases',
        titlefont_size=16,
        tickfont_size=14,
        range=[0,1000]
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1, # gap between bars of the same location coordinate.
    annotations=[
        dict(
            
            xref="x",
            yref="y",
            
            showarrow=True,
            arrowhead=1,
            arrowwidth=2
           
        
        )
    ]
    
)
fig.show()
fig.write_image("korea"+ dfa.Date_updated.max().strftime('_%m_%d_%Y')+".jpeg")
In [20]:
fig = go.Figure()
fig.add_trace(go.Bar(x=df_italy.Date_updated,
                y=df_italy.Confirmed_added,
                name='Italy',
                marker_color='rgb(55, 83, 109)'
                ))

fig.add_trace(go.Bar(x=[pd.to_datetime('2020-03-09')]
                     , y=[20000],name='lockdown', marker_color='rgb(256,0,0)'))


fig.add_annotation(
            x=pd.to_datetime('2020-03-10'),
            y=7000,
            ax=100,
            ay=0,
            text="Lockdown: Mar 09")

fig.add_annotation(
            x=pd.to_datetime('2020-03-22'),
            y=6500,
            ax=70,
            ay=0,
            text="Peak: Mar 21")

fig.add_annotation(
            x=pd.to_datetime('2020-02-28'),
            y=5000,
            ax=0,
            ay=0,
            text="From lockdown to peak: 12 days",
            font=dict(
                color="black",
                size=18
            ))

fig.update_layout(
       autosize=False,
    width=800,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    
    showlegend=False,
    title='Italy Daily Cases',
    xaxis_tickfont_size=14,
    xaxis=dict(
    range=pd.to_datetime(['2020-02-20','2020-04-05'])),
    yaxis=dict(
        title='Daily Cases',
        titlefont_size=16,
        tickfont_size=14,
        range=[0,9000]
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1, # gap between bars of the same location coordinate.
    annotations=[
        dict(
            
            xref="x",
            yref="y",
            
            showarrow=True,
            arrowhead=1,
            arrowwidth=2
           
        
        )
    ]
    
)
fig.show()
fig.write_image("italy"+ dfa.Date_updated.max().strftime('_%m_%d_%Y')+".jpeg")
In [21]:
fig = go.Figure()
fig.add_trace(go.Bar(x=df_us.Date_updated,
                y=df_us.Confirmed_added,
                name='US',
                marker_color='rgb(55, 83, 109)'
                ))

fig.add_trace(go.Bar(x=pd.to_datetime(['2020-02-29','2020-03-19'])
                     , y=[25000,25000],name='events', marker_color='rgb(256,0,0)'))


fig.add_annotation(
            x=pd.to_datetime('2020-02-29'),
            y=20000,
            ax=100,
            ay=0,
            text="WA: State of Emergency")

fig.add_annotation(
            x=pd.to_datetime('2020-03-19'),
            y=15000,
            ax=-150,
            ay=0,
            text="More states lockdown; Travel bans")



fig.update_layout(
       autosize=False,
    width=800,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    
    showlegend=False,
    title='US Daily Cases',
    xaxis_tickfont_size=14,
    xaxis=dict(
    range=pd.to_datetime(['2020-02-20','2020-04-05'])),
    yaxis=dict(
        title='Daily Cases',
        titlefont_size=16,
        tickfont_size=14,
        range=[0,22000]
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1, # gap between bars of the same location coordinate.
    annotations=[
        dict(
            
            xref="x",
            yref="y",
            
            showarrow=True,
            arrowhead=1,
            arrowwidth=2
           
        
        )
    ]
    
)
fig.show()
fig.write_image("us"+ dfa.Date_updated.max().strftime('_%m_%d_%Y')+".jpeg")
In [22]:
fig = go.Figure()
fig.add_trace(go.Bar(x=df_canada.Date_updated,
                y=df_canada.Confirmed_added,
                name='Canada',
                marker_color='rgb(55, 83, 109)'
                ))

fig.add_trace(go.Bar(x=pd.to_datetime(['2020-02-26','2020-03-13','2020-03-17','2020-03-25'])
                     , y=[20000,20000,20000,20000],name='lockdown', marker_color='rgb(256,0,0)'))

fig.add_annotation(
            x=pd.to_datetime('2020-02-26'),
            y=1200,
            ax=100,
            ay=0,
            text="Warning from Minister of Health",
            font=dict( size=9))

fig.add_annotation(
            x=pd.to_datetime('2020-03-13'),
            y=900,
            ax=-100,
            ay=0,
            text="Recommendation against Intl. travel",
            font=dict( size=9))

fig.add_annotation(
            x=pd.to_datetime('2020-03-17'),
            y=1000,
            ax=50,
            ay=0,
            text="Travel bans",
            font=dict( size=9))

fig.add_annotation(
            x=pd.to_datetime('2020-03-17'),
            y=900,
            ax=90,
            ay=0,
            text="State of Emergency by Prov.",
            font=dict( size=9)
)

fig.add_annotation(
            x=pd.to_datetime('2020-03-25'),
            y=1200,
            ax=-50,
            ay=0,
            text="Quarantine Act",
    font=dict( size=9)
           )

fig.update_layout(
    showlegend=False,
    title='Canada Daily Cases',
    width=800,
    height=500,
    xaxis_tickfont_size=14,
    xaxis=dict(
    range=pd.to_datetime(['2020-02-20','2020-04-05'])),
    yaxis=dict(
        title='Daily Cases',
        titlefont_size=16,
        tickfont_size=14,
        range=[0,1500]
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1, # gap between bars of the same location coordinate.
    annotations=[
        dict(
            
            xref="x",
            yref="y",
            
            showarrow=True,
            arrowhead=1,
            arrowwidth=2
           
        
        )
    ]
    
)
fig.show()
fig.write_image("canada"+ dfa.Date_updated.max().strftime('_%m_%d_%Y')+".jpeg")
In [27]:
df_mod=pd.DataFrame({'Country':dfa.Country.unique()})
df_mod['current_date']=pd.to_datetime('2020-01-01')
df_mod['current_case']=0
df_mod['slope']=0


for i, row in df_mod.iterrows():
    tmp=dfa[(dfa.Country==row['Country']) & (dfa.Confirmed <=4000)].copy().tail(5)
    reg=LinearRegression().fit(tmp['StartDate'][:,np.newaxis],np.log10(tmp['Confirmed']))
    #df_mod['slope'].iloc[i]=reg.coef_
    df_mod.loc[i,'slope']=reg.coef_
    #df_mod['current_date'].iloc[i]=tmp['Date_updated'].max()
    df_mod.loc[i,'current_date']=tmp['Date_updated'].max()
    #df_mod['current_case'].iloc[i]=tmp['Confirmed'].max()
    df_mod.loc[i,'current_case']=tmp['Confirmed'].max()
    
In [28]:
fig=px.scatter(df_mod,x='current_case',y='slope',color='Country',log_y=True, range_x=[0.01,10000])
fig.show()
In [23]:
df_mod[df_mod['Country']=='Germany']
Out[23]:
Country current_date current_case slope
21 Germany 2020-03-13 3675 0.114389
In [24]:
fig=px.line({'Date_updated': ['2020-03-15','2020-04-02'], 'forecast': [10,10]},
            x='Date_updated',y='forecast',log_y=True,
            range_x=['2020-03-15','2020-03-31'],range_y=[100,500000])

clist=[
    '#1f77b4',  # muted blue
    '#ff7f0e',  # safety orange
    '#2ca02c',  # cooked asparagus green
    '#d62728',  # brick red
    '#9467bd',  # muted purple
    '#8c564b',  # chestnut brown
    '#e377c2',  # raspberry yogurt pink
    '#7f7f7f',  # middle gray
    '#bcbd22',  # curry yellow-green
    '#17becf'   # blue-teal
]
for i, ctry in enumerate(countrylist):
    fig.add_scatter(x=forecast[forecast.Country==ctry]['Date_updated'],
                   y=forecast[forecast.Country==ctry]['forecast'],mode="lines",
                   line=dict(width=2,color=clist[i]),name=ctry + ' forecast')
    fig.add_scatter(x=dfa[dfa.Country==ctry]['Date_updated'],
                y=dfa[dfa.Country==ctry]['Confirmed'],mode="markers",
                marker=dict(size=10, color=clist[i]),
                name=ctry
                )

fig.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-6ea91c51613c> in <module>
     16 ]
     17 for i, ctry in enumerate(countrylist):
---> 18     fig.add_scatter(x=forecast[forecast.Country==ctry]['Date_updated'],
     19                    y=forecast[forecast.Country==ctry]['forecast'],mode="lines",
     20                    line=dict(width=2,color=clist[i]),name=ctry + ' forecast')

NameError: name 'forecast' is not defined
In [138]:
fig=px.line({'Date_updated': ['2020-03-15','2020-04-02'], 'forecast': [10,10]},
            x='Date_updated',y='forecast',
            range_x=['2020-03-15','2020-03-31'],range_y=[100,150000])

clist=[
    '#1f77b4',  # muted blue
    '#ff7f0e',  # safety orange
    '#2ca02c',  # cooked asparagus green
    '#d62728',  # brick red
    '#9467bd',  # muted purple
    '#8c564b',  # chestnut brown
    '#e377c2',  # raspberry yogurt pink
    '#7f7f7f',  # middle gray
    '#bcbd22',  # curry yellow-green
    '#17becf'   # blue-teal
]
for i, ctry in enumerate(countrylist):
    fig.add_scatter(x=forecast[forecast.Country==ctry]['Date_updated'],
                   y=forecast[forecast.Country==ctry]['forecast'],mode="lines",
                   line=dict(width=2,color=clist[i]),name=ctry + ' forecast')
    fig.add_scatter(x=dfa[dfa.Country==ctry]['Date_updated'],
                y=dfa[dfa.Country==ctry]['Confirmed'],mode="markers",
                marker=dict(size=10, color=clist[i]),
                name=ctry
                )

fig.show()
In [109]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_x": True}]])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-109-d433594424f8> in <module>
      3 
      4 # Create figure with secondary y-axis
----> 5 fig = make_subplots(specs=[[{"secondary_x": True}]])

/opt/anaconda3/lib/python3.7/site-packages/plotly/subplots.py in make_subplots(rows, cols, shared_xaxes, shared_yaxes, start_cell, print_grid, horizontal_spacing, vertical_spacing, subplot_titles, column_widths, row_heights, specs, insets, column_titles, row_titles, x_title, y_title, **kwargs)
    453         type="xy", secondary_y=False, colspan=1, rowspan=1, l=0.0, r=0.0, b=0.0, t=0.0
    454     )
--> 455     _check_keys_and_fill("specs", specs, spec_defaults)
    456 
    457     # Validate secondary_y

/opt/anaconda3/lib/python3.7/site-packages/plotly/subplots.py in _check_keys_and_fill(name, arg, defaults)
    418                 # 2D list
    419                 for arg_ii in arg_i:
--> 420                     _checks(arg_ii, defaults)
    421             elif isinstance(arg_i, dict):
    422                 # 1D list

/opt/anaconda3/lib/python3.7/site-packages/plotly/subplots.py in _checks(item, defaults)
    408 make_subplots: {k}
    409     Valid keys include: {valid_keys}""".format(
--> 410                             k=repr(k), name=name, valid_keys=repr(list(defaults))
    411                         )
    412                     )

ValueError: 
Invalid key specified in an element of the 'specs' argument to make_subplots: 'secondary_x'
    Valid keys include: ['type', 'secondary_y', 'colspan', 'rowspan', 'l', 'r', 'b', 't']
In [115]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[1, 2, 3],
    y=[4, 5, 6],
    name="yaxis1 data"
))


fig.add_trace(go.Scatter(
    x=[2, 3, 4],
    y=[40, 50, 60],
    name="xaxis2 data",
    xaxis="x2"
))




# Create axis objects
fig.update_layout(
    xaxis=dict(
        domain=[0.3, 0.7]
    ),
    yaxis=dict(
        title="yaxis title",
        titlefont=dict(
            color="#1f77b4"
        ),
        tickfont=dict(
            color="#1f77b4"
        )
    ),
    xaxis2=dict(
        title="xaxis2 title",
        titlefont=dict(
            color="#ff7f0e"
        ),
        tickfont=dict(
            color="#ff7f0e"
        ),
        anchor="free",
        overlaying="x",
        side="top",
        position=1.
    )
 
    
)

# Update layout properties
fig.update_layout(
    title_text="multiple y-axes example",
    width=800,
)

fig.show()
In [141]:
#countrylist=allcountrylist[:10]
#countrylist.append('Japan')
countrylist=[
'Spain',
 'Germany',
 'Iran',
 'France',
 'Switzerland',
 'United Kingdom',
 'Japan',
 'Singapore'  ]

#countrylist.remove('China').remove('Korea, South').remove('Italy')

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=dfa.loc[dfa.Country=='China','StartDate'],
    y=dfa.loc[dfa.Country=='China','Confirmed'],
    name="China",
    xaxis="x1"
))


fig.add_trace(go.Scatter(
    x=dfa.loc[dfa.Country=='Korea, South','StartDate'],
    y=dfa.loc[dfa.Country=='Korea, South','Confirmed'],
    name="Korea, South",
    xaxis="x1"
))

fig.add_trace(go.Scatter(
    x=dfa.loc[dfa.Country=='Korea, South','Date_updated'],
    y=dfa.loc[dfa.Country=='Korea, South','Confirmed'],
    name="Korea, South",
    xaxis="x2"
   # range_x=['2020-03-11','2020-05-09']
))


fig.update_layout(
    xaxis=dict(
        domain=[0.1, 0.7],
        range=[1,60]
    ),
    yaxis=dict(
        title="yaxis title",
        type='log',
        titlefont=dict(
            color="#1f77b4"
        ),
        tickfont=dict(
            color="#1f77b4"
        )
    ),
    xaxis2=dict(
        range=pd.to_datetime(['2020-03-11','2020-05-09']).strftime('%m/%d/%Y'),
        title="xaxis2 title",
        titlefont=dict(
            color="#ff7f0e"
        ),
        tickfont=dict(
            color="#ff7f0e"
        ),
        anchor="free",
        overlaying="x",
        side="top",
        position=1.
    )
 
    
)

# Update layout properties
fig.update_layout(
    title_text="multiple y-axes example",
    width=1000
)

fig.show()
In [120]:
dfa[(dfa.Country=='Canada') & (dfa.Confirmed > 0)]
Out[120]:
Country Date_updated Confirmed Death StartDate
1802 Canada 2020-01-26 1 0 -44
1803 Canada 2020-01-27 1 0 -43
1804 Canada 2020-01-28 2 0 -42
1805 Canada 2020-01-29 2 0 -41
1806 Canada 2020-01-30 2 0 -40
1807 Canada 2020-01-31 4 0 -39
1808 Canada 2020-02-01 4 0 -38
1809 Canada 2020-02-02 4 0 -37
1810 Canada 2020-02-03 4 0 -36
1811 Canada 2020-02-04 4 0 -35
1812 Canada 2020-02-05 5 0 -34
1813 Canada 2020-02-06 5 0 -33
1814 Canada 2020-02-07 7 0 -32
1815 Canada 2020-02-08 7 0 -31
1816 Canada 2020-02-09 7 0 -30
1817 Canada 2020-02-10 7 0 -29
1818 Canada 2020-02-11 7 0 -28
1819 Canada 2020-02-12 7 0 -27
1820 Canada 2020-02-13 7 0 -26
1821 Canada 2020-02-14 7 0 -25
1822 Canada 2020-02-15 7 0 -24
1823 Canada 2020-02-16 7 0 -23
1824 Canada 2020-02-17 8 0 -22
1825 Canada 2020-02-18 8 0 -21
1826 Canada 2020-02-19 8 0 -20
1827 Canada 2020-02-20 8 0 -19
1828 Canada 2020-02-21 9 0 -18
1829 Canada 2020-02-22 9 0 -17
1830 Canada 2020-02-23 9 0 -16
1831 Canada 2020-02-24 10 0 -15
1832 Canada 2020-02-25 11 0 -14
1833 Canada 2020-02-26 11 0 -13
1834 Canada 2020-02-27 13 0 -12
1835 Canada 2020-02-28 14 0 -11
1836 Canada 2020-02-29 20 0 -10
1837 Canada 2020-03-01 24 0 -9
1838 Canada 2020-03-02 27 0 -8
1839 Canada 2020-03-03 30 0 -7
1840 Canada 2020-03-04 33 0 -6
1841 Canada 2020-03-05 37 0 -5
1842 Canada 2020-03-06 49 0 -4
1843 Canada 2020-03-07 54 0 -3
1844 Canada 2020-03-08 64 0 -2
1845 Canada 2020-03-09 77 1 -1
1846 Canada 2020-03-10 79 1 0
1847 Canada 2020-03-11 108 1 1
1848 Canada 2020-03-12 117 1 2
1849 Canada 2020-03-13 193 1 3
1850 Canada 2020-03-14 198 1 4
1851 Canada 2020-03-15 252 1 5
1852 Canada 2020-03-16 415 4 6
1853 Canada 2020-03-17 478 5 7
1854 Canada 2020-03-18 657 8 8
1855 Canada 2020-03-19 800 9 9
1856 Canada 2020-03-20 943 12 10
1857 Canada 2020-03-21 1277 19 11
1858 Canada 2020-03-22 1469 21 12
1859 Canada 2020-03-23 2088 25 13
In [49]:
df_now.sort_values('Confirmed',ascending=False,inplace=True)
In [51]:
df_now[:10]
Out[51]:
Country Confirmed Death
33 China 81498 3274
80 Italy 63927 6077
157 US 43847 557
142 Spain 35136 2311
62 Germany 29056 123
76 Iran 23049 1812
58 France 20123 862
86 Korea, South 8961 111
147 Switzerland 8795 120
161 United Kingdom 6726 336
In [330]:
#fig=px.line(log_y=True,range_x=[0,40],range_y=[100,100000],width=1000, height=500)
#fig=px.line(dfa[dfa.Country.isin(countrylist)],x='StartDate',y='Confirmed',color='Country',log_y=True
#            ,range_x=[0,40],range_y=[100,100000])
tmp=dfa[dfa.Country.isin(df_now[df_now.Confirmed > 2000]['Country'])]
px.line(tmp,x='StartDate',y='Confirmed',color='Country',log_y=False, range_x=[0,20], range_y=[100,5000])
In [336]:
tmp=dfa[dfa.Country.isin(df_now[df_now.Confirmed > 1000]['Country'])]
px.line(tmp,x='Date_updated',y='Confirmed',color='Country',log_y=True, range_y=[100,10000],
       range_x=['2020-02-16','2020-03-25'])
In [339]:
EU_country=['Germany','Poland','Romania','Netherlands','Greece','Belgium','Portugal','Czechia','Hungary','Sweden']

tmp=dfa[dfa.Country.isin(EU_country)]
px.line(tmp,x='Date_updated',y='Confirmed',color='Country',log_y=True, range_x=['2020-02-23','2020-03-27'], range_y=[100,5000])
In [340]:
EU_country=['Germany','Poland','Romania','Netherlands','Greece','Belgium','Portugal','Czechia','Hungary','Sweden']

tmp=dfa[dfa.Country.isin(EU_country)]
px.line(tmp,x='StartDate',y='Confirmed',color='Country',log_y=True, range_x=[0,20], range_y=[100,5000])
In [3]:
src="https://data.ontario.ca/dataset/f4112442-bdc8-45d2-be3c-12efae72fb27/resource/455fd63b-603d-4608-8216-7d8647f43350/download/conposcovidloc.csv"
on=pd.read_csv(src)
In [4]:
df=on[['Reported date','Health Unit City','Health Unit Latitude','Health Unit Longitude']]
df.rename(columns={'Reported date':'date','Health Unit City':'City','Health Unit Latitude':'Lat','Health Unit Longitude':'Long'},inplace=True)
/opt/anaconda3/envs/jlab/lib/python3.6/site-packages/pandas/core/frame.py:4133: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [5]:
df=on[['Reported date','Health Unit City','Health Unit Latitude','Health Unit Longitude']]
df.rename(columns={'Reported date':'date','Health Unit City':'City','Health Unit Latitude':'Lat','Health Unit Longitude':'Long'},inplace=True)
df.City.fillna('Unknown',inplace=True)
df.Lat.fillna(df.Lat.mean(),inplace=True)
df.Long.fillna(df.Long.mean(),inplace=True)


dfc=df.groupby(['date','City','Lat','Long'])['date'].count().reset_index(name='dailycase')

dfc=dfc.sort_values(['City','Lat','Long','date'])

dfc['total_count']=dfc.groupby(['City','Lat','Long']).cumsum()
/opt/anaconda3/envs/jlab/lib/python3.6/site-packages/pandas/core/generic.py:6245: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [11]:
on['Reported date'].value_counts()
Out[11]:
2020-03-26    170
2020-03-25    100
2020-03-24     85
2020-03-23     78
2020-03-20     60
2020-03-21     59
2020-03-22     48
2020-03-19     44
2020-03-15     42
2020-03-16     32
2020-03-18     25
2020-03-14     24
2020-03-13     20
2020-03-12     17
2020-03-17     12
2020-03-11      6
2020-03-06      6
2020-03-09      4
2020-03-01      4
2020-03-02      3
2020-02-29      3
2020-03-08      3
2020-03-05      2
2020-02-28      2
2020-03-03      2
2020-02-27      1
2020-03-10      1
2020-02-23      1
2020-01-25      1
2020-02-26      1
2020-01-31      1
2020-01-27      1
Name: Reported date, dtype: int64
In [357]:
dfc['size'] = dfc['total_count'].pow(0.3) * 2

fig = px.scatter_geo(dfc, 
                     lat="Lat", lon="Long", 
                     
        
                     color="total_count", size='size', hover_name="City", 
                     range_color=[1,200],
                     projection="natural earth", animation_frame="date", 
                     title='COVID-19: Cases Over Time', color_continuous_scale="greens"
                    )
fig.show()
In [40]:
dfa[(dfa.Country=='Canada') & (dfa.Confirmed > 3000) & (dfa.Confirmed < 5000)][['Date_updated','Confirmed','Death']]
Out[40]:
Date_updated Confirmed Death
1948 2020-03-25 3251 30
1949 2020-03-26 4042 38
In [ ]: